OHI British Columbia | OHI Science | Citation policy
knitr::opts_chunk$set(fig.width = 6, fig.height = 4, fig.path = 'Figs/',
echo = TRUE, message = FALSE, warning = FALSE)
library(raster)
library(data.table)
source('~/github/src/R/common.R') ###
### includes library(tidyverse); library(stringr); dir_M points to ohi directory
dir_git <- '~/github/spp_health_dists'
dir_am_data <- file.path(dir_M, 'git-annex/globalprep/_raw_data/aquamaps/d2017')
### goal specific folders and info
dir_setup <- file.path(dir_git, 'data_setup')
dir_data <- file.path(dir_git, 'data')
dir_o_anx <- file.path(dir_O, 'git-annex/spp_health_dists')
# ### provenance tracking
# library(provRmd); prov_setup()
### support scripts
source('~/github/src/R/rast_tools.R')
### raster plotting and analyzing scriptsCreate a map of the distribution of biodiversity - all species in AquaMaps assessed by IUCN (using AquaMaps version of IUCN ID numbers). These maps will be generated using all taxonomic groups:
The following maps will also be generated for taxonomic groups (IUCN-assessed species only): * Mean risk * Variance of risk * Number of species
Species richness as the raw count of species indicated to exist in a particular cell. This is calculated for all species included in the AquaMaps dataset, regardless of IUCN assessment status.
We can consider species presence in several ways:
spp_richness_file <- file.path(dir_data, 'am_all_spp_richness.csv')
if(!file.exists(spp_richness_file)) {
am_spp_cells <- fread(file.path(dir_o_anx, 'am_files/am_spp_cells_all.csv'),
verbose = FALSE)
spp_richness_0pct_probwt <- am_spp_cells %>%
group_by(loiczid) %>%
summarize(n_spp_0pct = n(),
n_spp_probwt = sum(prob))
spp_richness_60pct <- am_spp_cells %>%
filter(prob >= .60) %>%
group_by(loiczid) %>%
summarize(n_spp_60pct = n())
spp_richness <- spp_richness_0pct_probwt %>%
full_join(spp_richness_60pct, by = 'loiczid')
write_csv(spp_richness, spp_richness_file)
}spp_richness_file <- file.path(dir_data, 'am_all_spp_richness.csv')
spp_richness <- read_csv(spp_richness_file, col_types = 'dddd') %>%
mutate(log_n_probwt = log(n_spp_probwt))
loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))
rich_rast_0pct <- subs(loiczid_rast, spp_richness, by = 'loiczid', which = 'n_spp_0pct')
rich_rast_60pct <- subs(loiczid_rast, spp_richness, by = 'loiczid', which = 'n_spp_60pct')
rich_rast_probwt <- subs(loiczid_rast, spp_richness, by = 'loiczid', which = 'n_spp_probwt')
rich_rast_log_probwt <- subs(loiczid_rast, spp_richness, by = 'loiczid', which = 'log_n_probwt')
plot(rich_rast_0pct, main = 'Richness, All species, any presence')plot(rich_rast_60pct, main = 'Richness, All species, prob of occur >= 60%')plot(rich_rast_probwt, main = 'Richness, All species, weighted by prob of occur')plot(rich_rast_log_probwt, main = 'Richness, All species, log(prob of occur weighted))')Species range rarity is a measure of endemism that weighs the presence of a species in a particular area relative to its entire range. It is defined in Selig et al 2014 (and referenced from Williams et al. 1996: Promise and problems in applying quantitative complementary areas for representing the diversity of some neotropical plants)
\[Range \hspace{3pt} rarity_{cell} = \sum_{i=1}^N \frac{1}{A_i} \times w\]
Relative range rarity is range rarity divided by species richness to remove confounding effects (again as defined in Selig et al 2014).
As in Selig et al 2014, we will multiply rarity by 100,000 and relative rarity by 1000 for analytical convenience.
This is calculated for all species included in the AquaMaps dataset, regardless of IUCN assessment status.
We will calculate range rarity using the same three cuts as for richness; for each cut, species presence, total range, and richness will all be calculated using the same cut method.
spp_rarity_file <- file.path(dir_data, 'am_all_spp_rarity.csv')
if(!file.exists(spp_rarity_file)) {
if(!exists('am_spp_cells')) {
am_spp_cells <- fread(file.path(dir_o_anx, 'am_files/am_spp_cells_all.csv'),
verbose = FALSE)
}
am_spp_ranges <- read_csv(file.path(dir_data, 'am_spp_range_summary.csv'))
spp_cells_ranges <- am_spp_cells %>%
left_join(am_spp_ranges, by = 'am_sid')
spp_rarity_0pct_probwt <- spp_cells_ranges %>%
group_by(loiczid) %>%
summarize(rarity_0pct = sum( 1 / area_km2_0pct),
rarity_probwt = sum(prob / area_km2_probwt))
spp_rarity_60pct <- spp_cells_ranges %>%
filter(prob >= .60) %>%
group_by(loiczid) %>%
summarize(rarity_60pct = sum( 1 / area_km2_60pct, na.rm = TRUE))
spp_rarity <- spp_rarity_0pct_probwt %>%
full_join(spp_rarity_60pct, by = 'loiczid') %>%
mutate(rarity_0pct = rarity_0pct * 100000,
rarity_60pct = rarity_60pct * 100000,
rarity_probwt = rarity_probwt * 100000)
write_csv(spp_rarity, spp_rarity_file)
}spp_rarity <- read_csv(file.path(dir_data, 'am_all_spp_rarity.csv'),
col_types = 'dddd')
spp_richness <- read_csv(file.path(dir_data, 'am_all_spp_richness.csv'),
col_types = 'dddd')
spp_rel_rare <- spp_rarity %>%
left_join(spp_richness, by = 'loiczid') %>%
mutate(rel_rare_0pct = rarity_0pct / n_spp_0pct * 1000,
rel_rare_60pct = rarity_60pct / n_spp_60pct * 1000,
rel_rare_probwt = rarity_probwt / n_spp_probwt * 1000) %>%
select(loiczid, contains('rel_rare'))
write_csv(spp_rel_rare, file.path(dir_data, 'am_all_spp_rel_rarity.csv'))spp_rarity <- read_csv(file.path(dir_data, 'am_all_spp_rarity.csv'),
col_types = 'dddd')
spp_rel_rare <- read_csv(file.path(dir_data, 'am_all_spp_rel_rarity.csv'),
col_types = 'dddd')
loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))
rare_rast_0pct <- subs(loiczid_rast, spp_rarity,
by = 'loiczid', which = 'rarity_0pct')
rare_rast_60pct <- subs(loiczid_rast, spp_rarity,
by = 'loiczid', which = 'rarity_60pct')
rare_rast_probwt <- subs(loiczid_rast, spp_rarity,
by = 'loiczid', which = 'rarity_probwt')
plot(rare_rast_0pct, main = 'Range rarity, All species, any presence')plot(rare_rast_60pct, main = 'Range rarity, All species, prob of occur >= 60%')plot(rare_rast_probwt, main = 'Range rarity, All species, weighted by prob of occur')### relative range rarity
rel_rare_rast_0pct <- subs(loiczid_rast, spp_rel_rare,
by = 'loiczid', which = 'rel_rare_0pct')
rel_rare_rast_60pct <- subs(loiczid_rast, spp_rel_rare,
by = 'loiczid', which = 'rel_rare_60pct')
rel_rare_rast_probwt <- subs(loiczid_rast, spp_rel_rare,
by = 'loiczid', which = 'rel_rare_probwt')
plot(rel_rare_rast_0pct, main = 'Rel range rarity, All spp, any presence')plot(rel_rare_rast_60pct, main = 'Rel range rarity, All spp, prob of occur >= 60%')plot(rel_rare_rast_probwt, main = 'Rel range rarity, All spp, weighted by prob of occur')As in the species richness and rarity calculations, we will calculate mean risk using three different considerations of species presence: all presence (non-zero probability of occurrence), “core habitat” (60% or greater prob of occurrence), and probability-weighted.
This binds rows into a long dataframe, but the saved file is rather large. By rounding the decimals a bit, we can shave off quite a bit of file size.
iucn_version <- '2017-3'
cell_risk_file <- file.path(dir_data, sprintf('risk_by_cell_all_%s.csv', iucn_version))
iucn_to_am_lookup <- read_csv(file.path(dir_setup, 'int',
'am_iucn_crosslisted_spp.csv')) %>%
select(am_sid = speciesid, iucn_sid = iucn_id) %>%
distinct()
spp_current_risk <- read_csv(file.path(dir_data,
sprintf('iucn_risk_current_%s.csv', iucn_version)),
col_types = cols_only(iucn_sid = 'i', cat = 'c', cat_score = 'd')) %>%
select(iucn_sid, cat, cat_score) %>%
left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
as.data.table()
spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells_assessed.csv'),
verbose = FALSE)##
Read 42.8% of 16116125 rows
Read 62.5% of 16116125 rows
Read 83.6% of 16116125 rows
Read 16116125 rows and 3 (of 3) columns from 0.338 GB file in 00:00:05
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
### using data.table join:
spp_cells_risk <- spp_current_risk[spp_cells, on = 'am_sid'] %>%
filter(!is.na(cat_score) & !is.na(iucn_sid))
spp_cells_risk_sum_0pct <- spp_cells_risk %>%
filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
# filter(prob >= 0.60) %>%
group_by(loiczid) %>%
summarize(n_spp = n(),
mean_risk = mean(cat_score),
var_risk = var(cat_score),
presence = '0%')
spp_cells_risk_sum_60pct <- spp_cells_risk %>%
filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
filter(prob >= 0.60) %>%
group_by(loiczid) %>%
summarize(n_spp = n(),
mean_risk = mean(cat_score),
var_risk = var(cat_score),
presence = '60%')
### For the prob weighted calculations, weight the mean and var:
### mean = 1/n * sum(y)
### ==> mean = 1/sum(prob) * sum(y * prob)
### var = 1/n * sum[y - E(y)]^2
### ==> var = 1/sum(prob) * sum([y - E(y)]^2 * prob)
spp_cells_risk_sum_probwt <- spp_cells_risk %>%
filter(!is.na(cat_score) & !is.na(iucn_sid)) %>%
# filter(prob >= 0.60) %>%
group_by(loiczid) %>%
summarize(n_spp = sum(prob),
mean_risk = 1/n_spp * sum(cat_score * prob),
var_risk = 1/n_spp * sum((cat_score - mean_risk)^2 * prob),
presence = 'prob')
spp_cells_risk_sum <- bind_rows(spp_cells_risk_sum_0pct,
spp_cells_risk_sum_60pct,
spp_cells_risk_sum_probwt) %>%
mutate(var_risk = ifelse(mean_risk == 0 & is.na(var_risk), 0, var_risk),
mean_risk = round(mean_risk, 6),
var_risk = round(var_risk, 6))
### if mean risk == 0, all risks are zero, so zero variance.
write_csv(spp_cells_risk_sum, cell_risk_file)loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))
spp_cells_risk_sum <- read_csv(cell_risk_file,
col_types = 'idddc')
cuts <- c('0%', '60%', 'prob')
for(cut in cuts) { ### cut <- cuts[3]
df_cut <- spp_cells_risk_sum %>%
filter(presence == cut)
cat(sprintf('#### Mean, variance, and richness based on %s', cut))
risk_rast <- subs(loiczid_rast, df_cut, by = 'loiczid', which = 'mean_risk')
var_rast <- subs(loiczid_rast, df_cut, by = 'loiczid', which = 'var_risk')
nspp_rast <- subs(loiczid_rast, df_cut, by = 'loiczid', which = 'n_spp')
plot(risk_rast, main = sprintf('Mean risk, cut = %s', cut))
plot(var_rast, main = sprintf('Variance of risk, cut = %s', cut))
plot(nspp_rast, main = sprintf('N spp (risk assessed), cut = %s', cut))
}Again we will use a probability threshold of 0.60 to determine species presence. We filter out any species with NA for iucn_sid or trend_score.
cell_trend_file <- file.path(dir_data, sprintf('iucn_trend_by_cell_%s.csv', iucn_version))
iucn_to_am_lookup <- read_csv(file.path(dir_data, 'am_iucn_crosslisted_spp.csv')) %>%
select(am_sid = speciesid, iucn_sid = iucn_id) %>%
distinct()
spp_trend <- read_csv(file.path(dir_data, 'trend_by_spp.csv')) %>%
left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
as.data.table()
if(!exists('spp_cells')) {
spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells_assessed.csv'),
verbose = FALSE)
}
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
### using data.table join:
spp_cells_trend <- spp_trend[spp_cells, on = 'am_sid']
spp_cells_trend_sum_0pct <- spp_cells_trend %>%
filter(!is.na(trend_score) & !is.na(iucn_sid)) %>%
# filter(prob >= 0.60) %>%
group_by(loiczid) %>%
summarize(n_spp = n(),
mean_trend = mean(trend_score),
var_trend = var(trend_score),
presence = '0%')
spp_cells_trend_sum_60pct <- spp_cells_trend %>%
filter(!is.na(trend_score) & !is.na(iucn_sid)) %>%
filter(prob >= 0.60) %>%
group_by(loiczid) %>%
summarize(n_spp = n(),
mean_trend = mean(trend_score),
var_trend = var(trend_score),
presence = '60%')
spp_cells_trend_sum_probwt <- spp_cells_trend %>%
filter(!is.na(trend_score) & !is.na(iucn_sid)) %>%
# filter(prob >= 0.60) %>%
group_by(loiczid) %>%
summarize(n_spp = sum(prob),
mean_trend = 1/n_spp * sum(trend_score * prob),
var_trend = 1/n_spp * sum((trend_score - mean_trend)^2 * prob),
presence = 'prob')
spp_cells_trend_sum <- bind_rows(spp_cells_trend_sum_0pct,
spp_cells_trend_sum_60pct,
spp_cells_trend_sum_probwt) %>%
mutate(var_trend = ifelse(mean_trend == 0 & is.na(var_trend), 0, var_trend),
mean_trend = round(mean_trend, 6),
var_trend = round(var_trend, 6))
write_csv(spp_cells_trend_sum, cell_trend_file)loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))
### gotta force the mean_trend column to be double; there are lots of zero
### values so will default to thinking that column is integer.
cell_trend_file <- file.path(dir_data, sprintf('iucn_trend_by_cell_%s.csv', iucn_version))
spp_cells_trend_sum <- read_csv(cell_trend_file,
col_types = 'idddc')
cuts <- c('0%', '60%', 'prob')
for(cut in cuts) { ### cut <- cuts[3]
df_cut <- spp_cells_trend_sum %>%
filter(presence == cut)
cat(sprintf('#### Mean, variance, and richness based on %s', cut))
risk_rast <- subs(loiczid_rast, df_cut, by = 'loiczid', which = 'mean_trend')
var_rast <- subs(loiczid_rast, df_cut, by = 'loiczid', which = 'var_trend')
nspp_rast <- subs(loiczid_rast, df_cut, by = 'loiczid', which = 'n_spp')
plot(risk_rast, main = sprintf('Mean trend, cut = %s', cut))
plot(var_rast, main = sprintf('Variance of trend, cut = %s', cut))
plot(nspp_rast, main = sprintf('N spp (trend assessed), cut = %s', cut))
}Threatened species are those listed by the IUCN as Vulnerable, Endangered, or Critically Endangered. We can count threatened species the same way we determined species richness, filtering on the category (or the category score) to exclude LC, NT, and EX.
iucn_version <- '2017-3'
cell_threatened_file <- file.path(dir_data,
sprintf('threatened_spp_by_cell_all_%s.csv', iucn_version))
iucn_to_am_lookup <- read_csv(file.path(dir_setup, 'int',
'am_iucn_crosslisted_spp.csv')) %>%
select(am_sid = speciesid, iucn_sid = iucn_id) %>%
distinct()
spp_threatened <- read_csv(file.path(dir_data,
sprintf('iucn_risk_current_%s.csv', iucn_version)),
col_types = cols_only(iucn_sid = 'i', cat = 'c', cat_score = 'd')) %>%
filter(cat %in% c('VU', 'EN', 'CR')) %>%
left_join(iucn_to_am_lookup, by = 'iucn_sid') %>%
as.data.table()
spp_cells <- fread(file.path(dir_o_anx, 'am_files', 'am_spp_cells_assessed.csv'),
verbose = FALSE)##
Read 13.1% of 16116125 rows
Read 32.9% of 16116125 rows
Read 53.0% of 16116125 rows
Read 73.6% of 16116125 rows
Read 93.8% of 16116125 rows
Read 16116125 rows and 3 (of 3) columns from 0.338 GB file in 00:00:07
# git_prov(file.path(dir_o_anx, 'am_files', 'am_spp_cells.csv'), filetype = 'input')
### using data.table join:
spp_cells_threatened <- spp_threatened[spp_cells, on = 'am_sid'] %>%
filter(!is.na(cat_score) & !is.na(iucn_sid))
spp_cells_threat_sum_0pct <- spp_cells_threatened %>%
group_by(loiczid) %>%
summarize(n_threatened = n(),
presence = '0%')
spp_cells_threat_sum_60pct <- spp_cells_threatened %>%
filter(prob >= 0.60) %>%
group_by(loiczid) %>%
summarize(n_threatened = n(),
presence = '60%')
spp_cells_threat_sum_probwt <- spp_cells_threatened %>%
group_by(loiczid) %>%
summarize(n_threatened = sum(prob),
presence = 'prob')
spp_cells_threat_sum <- bind_rows(spp_cells_threat_sum_0pct,
spp_cells_threat_sum_60pct,
spp_cells_threat_sum_probwt)
write_csv(spp_cells_threat_sum, cell_threatened_file)loiczid_rast <- raster(file.path(dir_git, 'spatial/loiczid_raster.tif'))
spp_cells_threat_sum <- read_csv(cell_threatened_file,
col_types = 'idc')
cuts <- c('0%', '60%', 'prob')
for(cut in cuts) { ### cut <- cuts[3]
df_cut <- spp_cells_threat_sum %>%
filter(presence == cut)
cat(sprintf('#### Number of threatened species based on %s', cut))
threatened_rast <- subs(loiczid_rast, df_cut, by = 'loiczid', which = 'n_threatened')
plot(threatened_rast, main = sprintf('N of threatened spp, cut = %s', cut))
}